---
title: "Smoothing"
author: "Kohei Watanabe"
date: "`r format(Sys.time(), '%Y-%m-%d')`"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, collapse = FALSE)
source("functions.R")
```

```{r fig.width=8, fig.height=5}
par(mar = c(4.1, 4.1, 1.1, 2.1))
pred_country <- readRDS("data_prediction_country.RDS")
dat_accu <- data.frame()
for (m in names(pred_country)) {
    cat(m, "\n")
    smooth <- pred_country[[m]][["smooth"]]
    human_topic <- attr(smooth, "topic_human")
    accu <- accuracy(colnames(smooth)[apply(smooth, 1, which.max)], human_topic)
    summ <- summary(accu)
    summ["n"] <- nrow(smooth)
    dat_accu <- rbind(dat_accu, as.list(summ))
    
    matplot(smooth, type = "l", lty = 1, ylim = c(-1, 3), 
            ylab = "Log-likelihood ratio", xlab = "Sentence index")
    grid()
    legend("topleft", legend = colnames(smooth), lty = 1, col = seq_len(ncol(smooth)))
    points(seq_along(human_topic), rep(-1, length(human_topic)), 
           col = match(human_topic, colnames(smooth)), pch = "|")
}
dat_accu$f1 <- compute_f1(dat_accu)
dat_accu$country <- stri_sub(names(pred_country), 1, 3)
dat_accu$year <- as.numeric(stri_sub(names(pred_country), -4, -1))
```

```{r, asis = TRUE}
dat_accu$f1_raw <- do.call(rbind, lapply(pred_country, function(x) summary(test_accuracy(x[["raw"]])))) %>% 
                        as.data.frame() %>% 
                        compute_f1()
dat_accu$change <- dat_accu$f1 - dat_accu$f1_raw
dat_accu <- dat_accu[order(dat_accu$year, dat_accu$country),]
knitr::kable(dat_accu[,c("country", "year", "n", "f1_raw", "f1", "change")], digits = 3)
```

```{r}
range(dat_accu$f1)
mean(dat_accu$f1)
sd(dat_accu$f1)
cor(dat_accu$n, dat_accu$change)
```


```{r fig.width=8, fig.height=5}
par(mar = c(8.1, 4.1, 1.1, 2.1))
dat_accu$i <- seq_len(nrow(dat_accu))
plot(dat_accu$i, rep(1, nrow(dat_accu)), type = "n", xaxt = "n",
     ylim = c(0.4, 1), xlab = "", ylab = "F1")
grid()
segments(dat_accu$i, dat_accu$f1_raw, dat_accu$i, dat_accu$f1)
points(dat_accu$f1)
axis(1, dat_accu$i, paste(dat_accu$year, dat_accu$country), las = 2)
abline(h = mean(dat_accu$f1), lty = 2)
```

```{r}
mean(dat_accu$change)
range(dat_accu$change)
```
